### CGQ 11 Replication Data Preparation
### 2 December 2011

all.x <- read.csv("data/CombinedData.csv", stringsAsFactors=FALSE)

x <- all.x[, c("MiniLoc", "MiniTime", "MiniIsVtrRaceMiss", "MiniIsVtrWhite", "MiniIsVtrBlack", "MiniIsVtrHisp", "MiniIsVtrAsian", "MiniIsVtrOthRace", "VtrHmLang", "Educ", "MiniIsVtrFem", "MiniVtrAge")]

y <- data.frame(ShowID = all.x[, "ShowID"])

## omit cases with NA's
na.idx <- NULL
for(i in 1:nrow(x)){
	if(sum(is.na(x[i,])) > 0){
		na.idx <- append(na.idx, i)
	}	
}
x <- x[-na.idx, ]  ## n = 4322

x <- merge(x,y, by = "row.names", all.y = FALSE)
x <- x[, !(names(x) %in% "Row.names")]

### Assign Tr/Co:
treated.loc <- c("Gavin School",
                 "Boston Middle School Academy (Formally Endicott Elementary School)",
                   "Mozart School",
                   "Saint Anne's School",
                   "James A. Garfield School",
                   "St. Mark's School Hall",
                   "Roslindale Branch Library",
                   "Boston Trinity Academy ",
                   "Beethoven School",
                   "Curtis Hall" ,
                   "James J. Chittick School",
                   "Honan-Allston Branch Library",
                   "Marcus Garvey Gardens")
  
control.loc <- c("Thomas A. Edison School",
                   "William Howard Taft School",
                   "Patrick O'Hearn School",
                   "Phineas Bates School",
                   "Lower Mills Library",
                   "George H. Conley School",
                   "Foley Building",
                   "McKinley School",
                   "Early Learning Center",
                   "Woodrow Wilson School",
                   "Area E Police Station",
                   "Thomas J Kenny School",
                   "Shelburne Center")
                   
x$isTreated <- NA
x$isTreated[x$MiniLoc %in% treated.loc] <- 1
x$isTreated[x$MiniLoc %in% control.loc] <- 0
x <- x[!(is.na(x$isTreated)),]
x$isVtrHmEngl <- 0
x$isVtrHmEngl[x$VtrHmLang == "Engl"] <- 1

x$matchPair <- NA
x$matchPair[x$MiniLoc %in% c("James J. Chittick School", "Woodrow Wilson School")] <- 1
x$matchPair[x$MiniLoc %in% c("Beethoven School", "George H. Conley School")] <- 2
x$matchPair[x$MiniLoc %in% c("Boston Middle School Academy (Formally Endicott Elementary School)", "Early Learning Center")] <- 3
x$matchPair[x$MiniLoc %in% c("Marcus Garvey Gardens", "Shelburne Center")] <- 4
x$matchPair[x$MiniLoc %in% c("James A. Garfield School", "Thomas A. Edison School")] <- 5
x$matchPair[x$MiniLoc %in% c("Boston Trinity Academy ", "Lower Mills Library")] <- 6
x$matchPair[x$MiniLoc %in% c("Mozart School", "Area E Police Station")] <- 7
x$matchPair[x$MiniLoc %in% c("Roslindale Branch Library", "Phineas Bates School")] <- 8
x$matchPair[x$MiniLoc %in% c("St. Mark's School Hall", "Patrick O'Hearn School")] <- 9
x$matchPair[x$MiniLoc %in% c("Gavin School", "Foley Building")] <- 10
x$matchPair[x$MiniLoc %in% c("Honan-Allston Branch Library", "William Howard Taft School")] <- 11
x$matchPair[x$MiniLoc %in% c("Curtis Hall", "McKinley School")] <- 12
x$matchPair[x$MiniLoc %in% c("Saint Anne's School", "Thomas J Kenny School")] <- 13

## Choose pair with largest number of units, and well-balanced 188 to 202
x.min <- x[x$matchPair == 11,]
x.min$timeInt <- NA
x.min$minsPost7 <- NA
for(i in 1:nrow(x.min)){
	x.min$timeInt[i] <- as.integer(paste(unlist(strsplit(x.min$MiniTime[i], ":")), collapse = ""))
	x.min$minsPost7[i] <- (floor(x.min$timeInt[i]/100)-7)*60+ (x.min$timeInt[i] %% 100)
}

o <- order(x.min$minsPost7)
x.pr.sort <- x.min[o,]
x.pr.sort$myid <- 1:nrow(x.pr.sort) ## assign IDs
rm(x.min)
rm(i, na.idx, o, y, control.loc, treated.loc, all.x, x)
